“The simple graph has brought more information to the data analyst’s mind than any other device.”
— John Tukey
gg in “ggplot2” stands for Grammar of GraphicsStanford Open Policing Project
Police Searches Drop Dramatically in States that Legalized Marijuana
stops <- read_csv("./data/opp-search-marijuana_state.csv") %>%
filter(state %in% c("WA", "CO")) %>%
mutate(legalization_status = ifelse(quarter <= "2013-01-01", "pre","post"),
search_rate_100 = search_rate * 100)
Exercise: Determine which variable is mapped to which aesthetic (x-axis, y-axis, etc.) element of the dataset.
class: center, middle
ggplot(data = stops)
ggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100))
ggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess")
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE)
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Year", y = "Search Rate", color = "Driver Race",
title = "Washington Highway Patrol Searches", subtitle = "Searches Per Hundred stops")
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))+
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = stops, aes(x = quarter, y = search_rate_100, size = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, size = search_rate_100)) +
geom_point(alpha = 0.5)
Exercise: Using information from https://ggplot2.tidyverse.org/articles/ggplot2-specs.html add color, size, alpha, and shape aesthetics to your graph. Experiment. Do different things happen when you map aesthetics to discrete and continuous variables? What happens when you use more than one aesthetic?
stops %>% ggplot(aes(x = quarter , y = search_rate_100, color = driver_race)) +
geom_point() +
theme_minimal(base_size = 12) +
labs(title = "Washington") + ## scale_fill for 2d objects like bars, scale_color for lines
#scale_color_brewer(type = qual, palette = "Dark2")
theme(legend.title = element_blank()) + scale_x_date(date_breaks = "1 year", date_labels = "%Y")
geom levelggplot(data = stops) +
geom_point(mapping = aes(x = quarter, y = search_rate_100))
geomsggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100)) +
geom_point() +
geom_smooth(aes(color = driver_race), method = "loess", se = FALSE)
aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100,
color = driver_race)) +
geom_point()
aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100)) +
geom_point(color = "red")
ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100)) +
geom_point(color = "#63B3E8")
stops %>%
ggplot(aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(data = stops) +
geom_point(aes(x = quarter, y = search_rate_100, color = "blue"))
Exercise: What is wrong with the following?
stops %>%
ggplot(aes(x = quarter, y = search_rate_100, color = legalization_status)) %>%
geom_point()
What is wrong with the following?
stops %>%
ggplot(aes(x = quarter, y = search_rate_100, color = legalization_status)) %>%
geom_point()
## Error: `mapping` must be created by `aes()`
## Did you use %>% instead of +?
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_point() +
geom_line()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_line()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(span = 0.2, se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = filter(stops, search_rate_100 < .2),
size = 5, color = "gray") +
geom_point()
Exercise: Work with your neighbor to sketch what the following plots will look like. No cheating! Do not run the code, just think through the code for the time being.
pre_legalization_high <- stops %>%
filter((quarter < "2013-01-01" & search_rate_100 > 1.0))
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_point(data = pre_legalization_high, size = 5, color = "gray")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100 + .05, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_label_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")
Exercise: How would you fix the following plot?
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(color = "blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
scale_color_manual(values = c("#FF6EB4", "#00BFFF", "#008B8B")) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_wrap(state ~ driver_race)
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(state ~ driver_race)
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(driver_race ~ state)
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_reverse()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_sqrt()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_continuous(breaks = c(0, 0.25, 0.5, .75, 1.0))
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_bw()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_dark()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
wa_stops <- stops %>% filter(state == "WA") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Washington")
co_stops <- stops %>% filter(state == "CO") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Colorado") +
theme(legend.position = "none")
wa_stops + co_stops
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
(wa_stops / co_stops)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plotly::ggplotly(wa_stops)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Final Exercise:
**** Bonus: Add Colorado to the chart using the Patchwork library **** Play with themes and adjust titles, subtitles, captions, etc.
To really master themes:
ggplot2.tidyverse.org/articles/extending-ggplot2.html#creating-your-own-theme
class: center, middle
Make any plot by filling in the parameters of this template
knitr::include_graphics("./img/ggplot2-template.png")